require(base)
require(data.table)
## Loading required package: data.table
require(datasets)
require(data.sets)
## Loading required package: data.sets
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'data.sets'
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(forecast)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
require(GGally)
## Loading required package: GGally
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
require(ggplot2)
require(ggstats)
## Loading required package: ggstats
require(graphics)
require(grDevices)
require(lubridate)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
require(methods)
require(readr)
## Loading required package: readr
require(stats)
require(utils)
require(RcppRoll)
## Loading required package: RcppRoll
require(tseries)
## Loading required package: tseries
library(readr)
library(ggplot2)
library(dplyr)
library(ggcorrplot)
library(ggridges)
processed_weather <- read_csv("processed_weather.csv",
col_types = cols(date = col_date(format = "%Y-%m-%d"),
hour = col_time(format = "%H")))
View(processed_weather)
production <- read_csv("production.csv",
col_types = cols(date = col_date(format = "%Y-%m-%d"),
hour = col_time(format = "%H")))
View(production)
processed_weather$location <- paste(processed_weather$lat, processed_weather$lon, sep = "_" )
setDT(processed_weather)
weather_wide = dcast(processed_weather, date + hour ~ location, value.var = c("dswrf_surface", "tcdc_low.cloud.layer","tcdc_middle.cloud.layer","tcdc_high.cloud.layer","tcdc_entire.atmosphere","uswrf_top_of_atmosphere","csnow_surface","dlwrf_surface","uswrf_surface","tmp_surface"))
combined_data <- merge(production, weather_wide, by = c("date","hour"), all.x = TRUE, all.y = FALSE)
setDT(combined_data)
combined_data[,datetime := as.POSIXct(paste(combined_data$date,combined_data$hour), format = "%Y-%m-%d %H:%M:%S")]
combined_data[,mon:=as.character(month(date,label=T))]
hour_id <- 0:23
combined_data <- cbind(combined_data,hour_id)
combined_data$is_sun <-0
condition_1 <- combined_data$hour_id > 5
condition_2 <- combined_data$hour_id < 20
combined_data$is_sun[condition_1 & condition_2] = 1
combined_data$avg_temp <- rowMeans(combined_data[,229:253])
combined_data$avg_DSWRF <- rowMeans(combined_data[,4:28])
combined_data$avg_lcloud <- rowMeans(combined_data[,29:53])
combined_data$avg_mcloud <- rowMeans(combined_data[,54:78])
combined_data$avg_hcloud <- rowMeans(combined_data[,79:103])
combined_data$avg_tcloud <- rowMeans(combined_data[,104:128])
combined_data$avg_USWRFtop <- rowMeans(combined_data[,129:153])
combined_data$avg_DLWRFsurf <- rowMeans(combined_data[,179:203])
combined_data$avg_USWRFsurf <- rowMeans(combined_data[,204:228])
combined_data$avg_CSNOW <- rowMeans(combined_data[,154:178])
daily_productions <- production %>%
group_by(date) %>%
summarize(daily_prod = sum(production))
setDT(daily_productions)
weekly_productions <- production %>%
group_by(year = year(date), week = isoweek(date)) %>%
summarize(weekly_prod = sum(production))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
setDT(weekly_productions)
weekly_productions[,weeks:=1:.N]
monthly_productions <- production %>%
group_by(year = year(date), month = month(date)) %>%
summarize(monthly_prod = sum(production))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
setDT(monthly_productions)
monthly_productions[,months:=1:.N]
ggplot(combined_data, aes(x = date, y = production)) + geom_line() + labs(title = "Hourly Energy Production Data", x = "Date", y = "Energy Production ")

ggplot(daily_productions, aes(x = date, y = daily_prod)) + geom_line() + labs(title = "Daily Energy Production Data", x = "Date", y = "Energy Production ")

ggplot(weekly_productions, aes(x = weeks, y = weekly_prod)) + geom_line() + labs(title = "Weekly Energy Production Data", x = "Date", y = "Energy Production ")

ggplot(monthly_productions, aes(x = months, y = monthly_prod)) + geom_line()+ labs(title = "Monthly Energy Production Data", x = "Date", y = "Energy Production ")

plot(acf(combined_data$production,100), main = "Hourly Energy Production Autocorrelation")


plot(acf(daily_productions$daily_prod,365), main = "Daily Energy Production Autocorrelation")


plot(acf(weekly_productions$weekly_prod,108), main = "Weekly Energy Production Autocorrelation")


plot(acf(monthly_productions$monthly_prod), main = "Monthly Energy Production Autocorrelation")


plot(pacf(combined_data$production,72), main = "Hourly Energy Production Partial Autocorrelation")


plot(pacf(daily_productions$daily_prod), main = "Daily Energy Production Partial Autocorrelation")


production_available <- combined_data %>%
filter(is_sun == 1)
ggplot(production_available[date == "2023-05-15"], aes(x = datetime, y = production)) + geom_line()

ggplot(combined_data[date > "2024-02-01" & date < "2024-05-15"], aes(x = datetime, y = production)) + geom_line() + labs(title = "Hourly Energy Production Data 01.02.2024-15.05.2024", x = "Date", y = "Energy Production ")

corr_check <- data.frame(production_available$production, production_available$datetime,production_available$avg_DLWRFsurf, production_available$avg_temp, production_available$avg_hcloud, production_available$avg_lcloud, production_available$avg_mcloud, production_available$avg_tcloud, production_available$avg_USWRFsurf, production_available$avg_USWRFtop, production_available$avg_CSNOW, production_available$avg_DSWRF)
ggpairs(corr_check)
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 4 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 3 rows containing missing values
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).

ggplot(production_available, aes(x = avg_temp, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Temp", x = "Average Temp", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_DSWRF, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average DSWRF Surface", x = "Average DSWRF", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_tcloud, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Total Cloud", x = "Average Total Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_hcloud, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average High Cloud", x = "Average High Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_mcloud, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Middle Cloud", x = "Average Middle Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_lcloud, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Low Cloud", x = "Average Low Cloud", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_USWRFsurf, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average USWRF Surface", x = "Average USWRF Surface", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_USWRFtop, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average USWRF Atmosphere", x = "Average USWRF Top", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_DLWRFsurf, y = production)) + geom_line() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average DLWRF Surface", x = "Average DLWRF", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

ggplot(production_available, aes(x = avg_CSNOW, y = production)) + geom_point() + geom_smooth(method = "loess") + labs(title = "Hourly Energy Production vs. Average Snow", x = "Average Snow", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : at -0.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : radius 2.5e-05
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : pseudoinverse used at -0.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : neighborhood radius 0.005
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
## : zero-width neighborhood. make span bigger
## Warning: Failed to fit group -1.
## Caused by error in `predLoess()`:
## ! NA/NaN/Inf in foreign function call (arg 5)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(production_available, aes(x = datetime, y = production)) + geom_line() + geom_smooth(method = "loess") + geom_smooth(method = "lm") + labs(title = "Hourly Energy Production Smoothed", x = "Time", y = "Energy Production ")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

production_available[,Lag14:= shift(production,14)]
## Warning: Invalid .internal.selfref detected and fixed by taking a (shallow)
## copy of the data.table so that := can add this new column by reference. At an
## earlier point, this data.table has been copied by R (or was created manually
## using structure() or similar). Avoid names<- and attr<- which in R currently
## (and oddly) may copy the whole data.table. Use set* syntax instead to avoid
## copying: ?set, ?setnames and ?setattr. If this message doesn't help, please
## report your use case to the data.table issue tracker so the root cause can be
## fixed or this message improved.
production_available[,Lag1 := shift(production,1)]
production_available[,diff14 := production-Lag14]
Test_date_start = "2024-02-01"
Test_date_end = "2024-05-15"
model_data_tr <- production_available[date < Test_date_start]
model_data_test <- production_available[date >= Test_date_start & date <= Test_date_end]
tsr1 <- lm(production ~ mon + as.factor(hour_id) + Lag14 + Lag1, model_data_tr)
summary(tsr1)
##
## Call:
## lm(formula = production ~ mon + as.factor(hour_id) + Lag14 +
## Lag1, data = model_data_tr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8897 -0.6897 0.0730 0.7902 8.6023
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.488431 0.071663 6.816 9.89e-12 ***
## monAug 0.237178 0.071359 3.324 0.000891 ***
## monDec -0.223461 0.071485 -3.126 0.001777 **
## monFeb -0.077109 0.072529 -1.063 0.287740
## monJan -0.300394 0.066204 -4.537 5.76e-06 ***
## monJul 0.274782 0.071707 3.832 0.000128 ***
## monJun 0.112372 0.071191 1.578 0.114489
## monMar -0.089938 0.070668 -1.273 0.203158
## monMay 0.071019 0.070524 1.007 0.313946
## monNov -0.177527 0.071579 -2.480 0.013148 *
## monOct 0.014136 0.070471 0.201 0.841023
## monSep 0.187090 0.071629 2.612 0.009016 **
## as.factor(hour_id)7 1.435912 0.076003 18.893 < 2e-16 ***
## as.factor(hour_id)8 2.187998 0.081315 26.908 < 2e-16 ***
## as.factor(hour_id)9 1.995202 0.089157 22.379 < 2e-16 ***
## as.factor(hour_id)10 1.196201 0.094637 12.640 < 2e-16 ***
## as.factor(hour_id)11 0.879963 0.096433 9.125 < 2e-16 ***
## as.factor(hour_id)12 0.720929 0.096501 7.471 8.60e-14 ***
## as.factor(hour_id)13 0.396919 0.095199 4.169 3.08e-05 ***
## as.factor(hour_id)14 -0.210469 0.091806 -2.293 0.021893 *
## as.factor(hour_id)15 -0.962148 0.086275 -11.152 < 2e-16 ***
## as.factor(hour_id)16 -1.579514 0.080379 -19.651 < 2e-16 ***
## as.factor(hour_id)17 -1.361516 0.076353 -17.832 < 2e-16 ***
## as.factor(hour_id)18 -1.020975 0.075107 -13.594 < 2e-16 ***
## as.factor(hour_id)19 -0.530419 0.074803 -7.091 1.42e-12 ***
## Lag14 0.163682 0.006824 23.988 < 2e-16 ***
## Lag1 0.677217 0.006832 99.123 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.456 on 10613 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.8649, Adjusted R-squared: 0.8646
## F-statistic: 2613 on 26 and 10613 DF, p-value: < 2.2e-16
checkresiduals(tsr1)

##
## Breusch-Godfrey test for serial correlation of order up to 30
##
## data: Residuals
## LM test = 774.59, df = 30, p-value < 2.2e-16
prediction_data_tsr = copy(model_data_test)
prediction_data_tsr[, actual:= model_data_test$production]
prediction_data_tsr[, prediction_model1 := predict(tsr1,model_data_test)]
prediction_data_tsr$prediction_model1[prediction_data_tsr$prediction_model1 < 0] = 0
prediction_data_tsr[, residual_model1 := actual - prediction_model1]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model1, color = "model 1"))

tsr2 <- lm(production ~ -1 +mon + as.factor(hour_id) + Lag14 + Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_mcloud + avg_hcloud + avg_tcloud + avg_USWRFtop + avg_DLWRFsurf + avg_USWRFsurf + avg_CSNOW, model_data_tr)
summary(tsr2)
##
## Call:
## lm(formula = production ~ -1 + mon + as.factor(hour_id) + Lag14 +
## Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_mcloud + avg_hcloud +
## avg_tcloud + avg_USWRFtop + avg_DLWRFsurf + avg_USWRFsurf +
## avg_CSNOW, data = model_data_tr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0197 -0.7158 0.0781 0.7668 8.0608
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## monApr -6.2325540 1.5064692 -4.137 3.54e-05 ***
## monAug -6.0827269 1.5438259 -3.940 8.20e-05 ***
## monDec -6.9003819 1.4722342 -4.687 2.81e-06 ***
## monFeb -6.5021370 1.4640603 -4.441 9.04e-06 ***
## monJan -6.9197600 1.4682702 -4.713 2.47e-06 ***
## monJul -5.9998300 1.5300240 -3.921 8.86e-05 ***
## monJun -5.9474117 1.5146957 -3.926 8.67e-05 ***
## monMar -6.3202323 1.4862227 -4.253 2.13e-05 ***
## monMay -6.0845687 1.5117701 -4.025 5.74e-05 ***
## monNov -6.7954957 1.4878781 -4.567 5.00e-06 ***
## monOct -6.5232630 1.5070606 -4.328 1.52e-05 ***
## monSep -6.3087997 1.5296954 -4.124 3.75e-05 ***
## as.factor(hour_id)7 1.4339621 0.0748292 19.163 < 2e-16 ***
## as.factor(hour_id)8 2.2528233 0.0833386 27.032 < 2e-16 ***
## as.factor(hour_id)9 2.1429245 0.0992002 21.602 < 2e-16 ***
## as.factor(hour_id)10 1.9946090 0.1505606 13.248 < 2e-16 ***
## as.factor(hour_id)11 1.7473705 0.1655432 10.555 < 2e-16 ***
## as.factor(hour_id)12 1.6441303 0.1761312 9.335 < 2e-16 ***
## as.factor(hour_id)13 1.3667894 0.1816623 7.524 5.75e-14 ***
## as.factor(hour_id)14 0.7955475 0.1818304 4.375 1.22e-05 ***
## as.factor(hour_id)15 0.0448128 0.1771898 0.253 0.800344
## as.factor(hour_id)16 -0.7263122 0.1547507 -4.693 2.72e-06 ***
## as.factor(hour_id)17 -0.6585855 0.1364411 -4.827 1.41e-06 ***
## as.factor(hour_id)18 -0.4325847 0.1181362 -3.662 0.000252 ***
## as.factor(hour_id)19 -0.0285958 0.1037890 -0.276 0.782923
## Lag14 0.1591265 0.0068441 23.250 < 2e-16 ***
## Lag1 0.6224335 0.0078475 79.316 < 2e-16 ***
## avg_temp 0.0348264 0.0061725 5.642 1.72e-08 ***
## avg_DSWRF -0.0018410 0.0002163 -8.513 < 2e-16 ***
## avg_lcloud 0.0014841 0.0011342 1.308 0.190736
## avg_mcloud -0.0016616 0.0008028 -2.070 0.038511 *
## avg_hcloud 0.0003151 0.0008407 0.375 0.707853
## avg_tcloud -0.0040692 0.0010503 -3.874 0.000108 ***
## avg_USWRFtop -0.0002658 0.0003787 -0.702 0.482779
## avg_DLWRFsurf -0.0094946 0.0011758 -8.075 7.47e-16 ***
## avg_USWRFsurf 0.0010828 0.0005541 1.954 0.050699 .
## avg_CSNOW -0.2428656 0.1122221 -2.164 0.030475 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.423 on 10599 degrees of freedom
## (18 observations deleted due to missingness)
## Multiple R-squared: 0.9427, Adjusted R-squared: 0.9425
## F-statistic: 4713 on 37 and 10599 DF, p-value: < 2.2e-16
checkresiduals(tsr2)

##
## Breusch-Godfrey test for serial correlation of order up to 40
##
## data: Residuals
## LM test = 837.64, df = 40, p-value < 2.2e-16
prediction_data_tsr[, prediction_model2 := predict(tsr2,model_data_test)]
prediction_data_tsr$prediction_model2[prediction_data_tsr$prediction_model2 < 0] = 0
prediction_data_tsr[, residual_model2 := actual - prediction_model2]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model2, color = "model 2"))

tsr3 <- lm(production ~ -1 +mon + as.factor(hour_id) + Lag14 + Lag1 + avg_temp + avg_DSWRF + avg_mcloud + avg_tcloud + avg_DLWRFsurf + avg_USWRFsurf + avg_CSNOW, model_data_tr)
summary(tsr3)
##
## Call:
## lm(formula = production ~ -1 + mon + as.factor(hour_id) + Lag14 +
## Lag1 + avg_temp + avg_DSWRF + avg_mcloud + avg_tcloud + avg_DLWRFsurf +
## avg_USWRFsurf + avg_CSNOW, data = model_data_tr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.0155 -0.7242 0.0792 0.7732 8.0814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## monApr -5.8846140 1.3035009 -4.514 6.42e-06 ***
## monAug -5.7429828 1.3455591 -4.268 1.99e-05 ***
## monDec -6.5311525 1.2882731 -5.070 4.05e-07 ***
## monFeb -6.1402056 1.2729887 -4.823 1.43e-06 ***
## monJan -6.5499829 1.2812006 -5.112 3.24e-07 ***
## monJul -5.6573904 1.3270224 -4.263 2.03e-05 ***
## monJun -5.6116927 1.3103816 -4.282 1.86e-05 ***
## monMar -5.9629716 1.2871068 -4.633 3.65e-06 ***
## monMay -5.7412767 1.3046463 -4.401 1.09e-05 ***
## monNov -6.4302950 1.3005722 -4.944 7.76e-07 ***
## monOct -6.1679612 1.3153529 -4.689 2.78e-06 ***
## monSep -5.9588546 1.3324405 -4.472 7.82e-06 ***
## as.factor(hour_id)7 1.4363436 0.0744875 19.283 < 2e-16 ***
## as.factor(hour_id)8 2.2580297 0.0808278 27.936 < 2e-16 ***
## as.factor(hour_id)9 2.1497321 0.0915056 23.493 < 2e-16 ***
## as.factor(hour_id)10 1.9578034 0.1081488 18.103 < 2e-16 ***
## as.factor(hour_id)11 1.7081429 0.1148161 14.877 < 2e-16 ***
## as.factor(hour_id)12 1.5980308 0.1195390 13.368 < 2e-16 ***
## as.factor(hour_id)13 1.3216388 0.1217332 10.857 < 2e-16 ***
## as.factor(hour_id)14 0.7461996 0.1207838 6.178 6.73e-10 ***
## as.factor(hour_id)15 -0.0094597 0.1170861 -0.081 0.9356
## as.factor(hour_id)16 -0.7783211 0.1030807 -7.551 4.69e-14 ***
## as.factor(hour_id)17 -0.7084445 0.0947727 -7.475 8.32e-14 ***
## as.factor(hour_id)18 -0.4777300 0.0885034 -5.398 6.89e-08 ***
## as.factor(hour_id)19 -0.0678920 0.0840929 -0.807 0.4195
## Lag14 0.1581962 0.0068244 23.181 < 2e-16 ***
## Lag1 0.6226021 0.0078272 79.544 < 2e-16 ***
## avg_temp 0.0332317 0.0052299 6.354 2.18e-10 ***
## avg_DSWRF -0.0017787 0.0002011 -8.846 < 2e-16 ***
## avg_mcloud -0.0018571 0.0007863 -2.362 0.0182 *
## avg_tcloud -0.0036635 0.0007093 -5.165 2.45e-07 ***
## avg_DLWRFsurf -0.0090989 0.0009695 -9.386 < 2e-16 ***
## avg_USWRFsurf 0.0008952 0.0004951 1.808 0.0706 .
## avg_CSNOW -0.1613856 0.0972504 -1.659 0.0970 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.423 on 10603 degrees of freedom
## (17 observations deleted due to missingness)
## Multiple R-squared: 0.9427, Adjusted R-squared: 0.9425
## F-statistic: 5127 on 34 and 10603 DF, p-value: < 2.2e-16
checkresiduals(tsr3)

##
## Breusch-Godfrey test for serial correlation of order up to 37
##
## data: Residuals
## LM test = 824.68, df = 37, p-value < 2.2e-16
prediction_data_tsr[, prediction_model3 := predict(tsr3,model_data_test)]
prediction_data_tsr$prediction_model3[prediction_data_tsr$prediction_model3 < 0] = 0
prediction_data_tsr[, residual_model3 := actual - prediction_model3]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model3, color = "model 3"))

tsr4 <- lm(production ~ -1 +mon:as.factor(hour_id) + Lag14 + Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_tcloud + avg_DLWRFsurf + avg_USWRFsurf + avg_CSNOW, model_data_tr)
summary(tsr4)
##
## Call:
## lm(formula = production ~ -1 + mon:as.factor(hour_id) + Lag14 +
## Lag1 + avg_temp + avg_DSWRF + avg_lcloud + avg_tcloud + avg_DLWRFsurf +
## avg_USWRFsurf + avg_CSNOW, data = model_data_tr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7083 -0.5642 0.0647 0.6801 8.5590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Lag14 9.524e-02 6.821e-03 13.963 < 2e-16 ***
## Lag1 6.371e-01 7.797e-03 81.712 < 2e-16 ***
## avg_temp 9.127e-02 7.887e-03 11.573 < 2e-16 ***
## avg_DSWRF -4.300e-03 3.990e-04 -10.777 < 2e-16 ***
## avg_lcloud 1.926e-03 9.408e-04 2.047 0.0407 *
## avg_tcloud -3.565e-03 6.315e-04 -5.646 1.68e-08 ***
## avg_DLWRFsurf -1.521e-02 1.276e-03 -11.919 < 2e-16 ***
## avg_USWRFsurf 5.044e-03 6.704e-04 7.523 5.78e-14 ***
## avg_CSNOW -1.887e-01 1.043e-01 -1.810 0.0704 .
## monApr:as.factor(hour_id)6 -2.060e+01 1.937e+00 -10.634 < 2e-16 ***
## monAug:as.factor(hour_id)6 -2.049e+01 1.952e+00 -10.498 < 2e-16 ***
## monDec:as.factor(hour_id)6 -2.082e+01 1.917e+00 -10.860 < 2e-16 ***
## monFeb:as.factor(hour_id)6 -2.052e+01 1.894e+00 -10.831 < 2e-16 ***
## monJan:as.factor(hour_id)6 -2.073e+01 1.906e+00 -10.878 < 2e-16 ***
## monJul:as.factor(hour_id)6 -1.984e+01 1.961e+00 -10.117 < 2e-16 ***
## monJun:as.factor(hour_id)6 -1.985e+01 1.959e+00 -10.133 < 2e-16 ***
## monMar:as.factor(hour_id)6 -2.057e+01 1.915e+00 -10.743 < 2e-16 ***
## monMay:as.factor(hour_id)6 -2.060e+01 1.948e+00 -10.571 < 2e-16 ***
## monNov:as.factor(hour_id)6 -2.090e+01 1.929e+00 -10.836 < 2e-16 ***
## monOct:as.factor(hour_id)6 -2.077e+01 1.941e+00 -10.699 < 2e-16 ***
## monSep:as.factor(hour_id)6 -2.051e+01 1.948e+00 -10.530 < 2e-16 ***
## monApr:as.factor(hour_id)7 -1.827e+01 1.948e+00 -9.379 < 2e-16 ***
## monAug:as.factor(hour_id)7 -1.925e+01 1.984e+00 -9.703 < 2e-16 ***
## monDec:as.factor(hour_id)7 -2.042e+01 1.916e+00 -10.660 < 2e-16 ***
## monFeb:as.factor(hour_id)7 -1.970e+01 1.893e+00 -10.406 < 2e-16 ***
## monJan:as.factor(hour_id)7 -2.053e+01 1.905e+00 -10.777 < 2e-16 ***
## monJul:as.factor(hour_id)7 -1.843e+01 1.987e+00 -9.273 < 2e-16 ***
## monJun:as.factor(hour_id)7 -1.828e+01 1.978e+00 -9.243 < 2e-16 ***
## monMar:as.factor(hour_id)7 -1.884e+01 1.915e+00 -9.839 < 2e-16 ***
## monMay:as.factor(hour_id)7 -1.825e+01 1.970e+00 -9.267 < 2e-16 ***
## monNov:as.factor(hour_id)7 -1.940e+01 1.926e+00 -10.073 < 2e-16 ***
## monOct:as.factor(hour_id)7 -1.809e+01 1.938e+00 -9.332 < 2e-16 ***
## monSep:as.factor(hour_id)7 -1.833e+01 1.959e+00 -9.356 < 2e-16 ***
## monApr:as.factor(hour_id)8 -1.905e+01 1.968e+00 -9.680 < 2e-16 ***
## monAug:as.factor(hour_id)8 -1.820e+01 2.026e+00 -8.984 < 2e-16 ***
## monDec:as.factor(hour_id)8 -1.804e+01 1.914e+00 -9.429 < 2e-16 ***
## monFeb:as.factor(hour_id)8 -1.776e+01 1.896e+00 -9.365 < 2e-16 ***
## monJan:as.factor(hour_id)8 -1.855e+01 1.903e+00 -9.743 < 2e-16 ***
## monJul:as.factor(hour_id)8 -1.837e+01 2.011e+00 -9.136 < 2e-16 ***
## monJun:as.factor(hour_id)8 -1.867e+01 1.992e+00 -9.372 < 2e-16 ***
## monMar:as.factor(hour_id)8 -1.768e+01 1.927e+00 -9.177 < 2e-16 ***
## monMay:as.factor(hour_id)8 -1.838e+01 1.985e+00 -9.257 < 2e-16 ***
## monNov:as.factor(hour_id)8 -1.797e+01 1.932e+00 -9.301 < 2e-16 ***
## monOct:as.factor(hour_id)8 -1.774e+01 1.959e+00 -9.056 < 2e-16 ***
## monSep:as.factor(hour_id)8 -1.821e+01 1.998e+00 -9.111 < 2e-16 ***
## monApr:as.factor(hour_id)9 -1.845e+01 1.985e+00 -9.292 < 2e-16 ***
## monAug:as.factor(hour_id)9 -1.927e+01 2.057e+00 -9.369 < 2e-16 ***
## monDec:as.factor(hour_id)9 -1.780e+01 1.929e+00 -9.226 < 2e-16 ***
## monFeb:as.factor(hour_id)9 -1.723e+01 1.915e+00 -8.995 < 2e-16 ***
## monJan:as.factor(hour_id)9 -1.825e+01 1.917e+00 -9.521 < 2e-16 ***
## monJul:as.factor(hour_id)9 -1.876e+01 2.029e+00 -9.247 < 2e-16 ***
## monJun:as.factor(hour_id)9 -1.870e+01 2.003e+00 -9.335 < 2e-16 ***
## monMar:as.factor(hour_id)9 -1.820e+01 1.943e+00 -9.364 < 2e-16 ***
## monMay:as.factor(hour_id)9 -1.895e+01 1.998e+00 -9.487 < 2e-16 ***
## monNov:as.factor(hour_id)9 -1.839e+01 1.956e+00 -9.401 < 2e-16 ***
## monOct:as.factor(hour_id)9 -1.861e+01 1.989e+00 -9.357 < 2e-16 ***
## monSep:as.factor(hour_id)9 -1.863e+01 2.031e+00 -9.169 < 2e-16 ***
## monApr:as.factor(hour_id)10 -1.793e+01 1.911e+00 -9.381 < 2e-16 ***
## monAug:as.factor(hour_id)10 -1.795e+01 1.970e+00 -9.114 < 2e-16 ***
## monDec:as.factor(hour_id)10 -1.833e+01 1.919e+00 -9.548 < 2e-16 ***
## monFeb:as.factor(hour_id)10 -1.809e+01 1.907e+00 -9.483 < 2e-16 ***
## monJan:as.factor(hour_id)10 -1.872e+01 1.911e+00 -9.794 < 2e-16 ***
## monJul:as.factor(hour_id)10 -1.790e+01 1.931e+00 -9.273 < 2e-16 ***
## monJun:as.factor(hour_id)10 -1.785e+01 1.902e+00 -9.386 < 2e-16 ***
## monMar:as.factor(hour_id)10 -1.827e+01 1.909e+00 -9.568 < 2e-16 ***
## monMay:as.factor(hour_id)10 -1.798e+01 1.903e+00 -9.449 < 2e-16 ***
## monNov:as.factor(hour_id)10 -1.867e+01 1.929e+00 -9.679 < 2e-16 ***
## monOct:as.factor(hour_id)10 -1.840e+01 1.941e+00 -9.480 < 2e-16 ***
## monSep:as.factor(hour_id)10 -1.832e+01 1.962e+00 -9.341 < 2e-16 ***
## monApr:as.factor(hour_id)11 -1.829e+01 1.914e+00 -9.558 < 2e-16 ***
## monAug:as.factor(hour_id)11 -1.841e+01 1.978e+00 -9.307 < 2e-16 ***
## monDec:as.factor(hour_id)11 -1.883e+01 1.927e+00 -9.774 < 2e-16 ***
## monFeb:as.factor(hour_id)11 -1.822e+01 1.914e+00 -9.518 < 2e-16 ***
## monJan:as.factor(hour_id)11 -1.913e+01 1.919e+00 -9.971 < 2e-16 ***
## monJul:as.factor(hour_id)11 -1.801e+01 1.936e+00 -9.305 < 2e-16 ***
## monJun:as.factor(hour_id)11 -1.803e+01 1.903e+00 -9.478 < 2e-16 ***
## monMar:as.factor(hour_id)11 -1.846e+01 1.915e+00 -9.641 < 2e-16 ***
## monMay:as.factor(hour_id)11 -1.840e+01 1.906e+00 -9.655 < 2e-16 ***
## monNov:as.factor(hour_id)11 -1.892e+01 1.938e+00 -9.765 < 2e-16 ***
## monOct:as.factor(hour_id)11 -1.865e+01 1.951e+00 -9.557 < 2e-16 ***
## monSep:as.factor(hour_id)11 -1.854e+01 1.972e+00 -9.397 < 2e-16 ***
## monApr:as.factor(hour_id)12 -1.831e+01 1.914e+00 -9.566 < 2e-16 ***
## monAug:as.factor(hour_id)12 -1.816e+01 1.981e+00 -9.168 < 2e-16 ***
## monDec:as.factor(hour_id)12 -1.920e+01 1.931e+00 -9.944 < 2e-16 ***
## monFeb:as.factor(hour_id)12 -1.893e+01 1.917e+00 -9.873 < 2e-16 ***
## monJan:as.factor(hour_id)12 -1.942e+01 1.922e+00 -10.101 < 2e-16 ***
## monJul:as.factor(hour_id)12 -1.798e+01 1.937e+00 -9.281 < 2e-16 ***
## monJun:as.factor(hour_id)12 -1.810e+01 1.900e+00 -9.526 < 2e-16 ***
## monMar:as.factor(hour_id)12 -1.859e+01 1.917e+00 -9.698 < 2e-16 ***
## monMay:as.factor(hour_id)12 -1.780e+01 1.904e+00 -9.351 < 2e-16 ***
## monNov:as.factor(hour_id)12 -1.932e+01 1.942e+00 -9.945 < 2e-16 ***
## monOct:as.factor(hour_id)12 -1.896e+01 1.955e+00 -9.697 < 2e-16 ***
## monSep:as.factor(hour_id)12 -1.863e+01 1.976e+00 -9.427 < 2e-16 ***
## monApr:as.factor(hour_id)13 -1.881e+01 1.911e+00 -9.844 < 2e-16 ***
## monAug:as.factor(hour_id)13 -1.864e+01 1.975e+00 -9.435 < 2e-16 ***
## monDec:as.factor(hour_id)13 -1.993e+01 1.930e+00 -10.329 < 2e-16 ***
## monFeb:as.factor(hour_id)13 -1.878e+01 1.917e+00 -9.798 < 2e-16 ***
## monJan:as.factor(hour_id)13 -1.956e+01 1.921e+00 -10.182 < 2e-16 ***
## monJul:as.factor(hour_id)13 -1.798e+01 1.933e+00 -9.302 < 2e-16 ***
## monJun:as.factor(hour_id)13 -1.794e+01 1.895e+00 -9.471 < 2e-16 ***
## monMar:as.factor(hour_id)13 -1.899e+01 1.916e+00 -9.910 < 2e-16 ***
## monMay:as.factor(hour_id)13 -1.813e+01 1.896e+00 -9.562 < 2e-16 ***
## monNov:as.factor(hour_id)13 -2.001e+01 1.942e+00 -10.303 < 2e-16 ***
## monOct:as.factor(hour_id)13 -1.934e+01 1.953e+00 -9.905 < 2e-16 ***
## monSep:as.factor(hour_id)13 -1.858e+01 1.972e+00 -9.423 < 2e-16 ***
## monApr:as.factor(hour_id)14 -1.869e+01 1.905e+00 -9.811 < 2e-16 ***
## monAug:as.factor(hour_id)14 -1.894e+01 1.963e+00 -9.646 < 2e-16 ***
## monDec:as.factor(hour_id)14 -2.115e+01 1.926e+00 -10.982 < 2e-16 ***
## monFeb:as.factor(hour_id)14 -1.956e+01 1.914e+00 -10.220 < 2e-16 ***
## monJan:as.factor(hour_id)14 -2.026e+01 1.917e+00 -10.567 < 2e-16 ***
## monJul:as.factor(hour_id)14 -1.836e+01 1.926e+00 -9.532 < 2e-16 ***
## monJun:as.factor(hour_id)14 -1.855e+01 1.886e+00 -9.836 < 2e-16 ***
## monMar:as.factor(hour_id)14 -1.901e+01 1.912e+00 -9.943 < 2e-16 ***
## monMay:as.factor(hour_id)14 -1.861e+01 1.888e+00 -9.860 < 2e-16 ***
## monNov:as.factor(hour_id)14 -2.045e+01 1.936e+00 -10.565 < 2e-16 ***
## monOct:as.factor(hour_id)14 -2.042e+01 1.944e+00 -10.504 < 2e-16 ***
## monSep:as.factor(hour_id)14 -1.957e+01 1.961e+00 -9.978 < 2e-16 ***
## monApr:as.factor(hour_id)15 -1.923e+01 1.893e+00 -10.159 < 2e-16 ***
## monAug:as.factor(hour_id)15 -1.953e+01 1.946e+00 -10.036 < 2e-16 ***
## monDec:as.factor(hour_id)15 -2.182e+01 1.919e+00 -11.375 < 2e-16 ***
## monFeb:as.factor(hour_id)15 -2.090e+01 1.911e+00 -10.938 < 2e-16 ***
## monJan:as.factor(hour_id)15 -2.114e+01 1.910e+00 -11.071 < 2e-16 ***
## monJul:as.factor(hour_id)15 -1.930e+01 1.915e+00 -10.077 < 2e-16 ***
## monJun:as.factor(hour_id)15 -1.867e+01 1.875e+00 -9.955 < 2e-16 ***
## monMar:as.factor(hour_id)15 -1.994e+01 1.905e+00 -10.464 < 2e-16 ***
## monMay:as.factor(hour_id)15 -1.852e+01 1.878e+00 -9.861 < 2e-16 ***
## monNov:as.factor(hour_id)15 -2.174e+01 1.926e+00 -11.288 < 2e-16 ***
## monOct:as.factor(hour_id)15 -2.137e+01 1.932e+00 -11.058 < 2e-16 ***
## monSep:as.factor(hour_id)15 -2.054e+01 1.947e+00 -10.550 < 2e-16 ***
## monApr:as.factor(hour_id)16 -2.034e+01 1.904e+00 -10.685 < 2e-16 ***
## monAug:as.factor(hour_id)16 -2.077e+01 1.951e+00 -10.645 < 2e-16 ***
## monDec:as.factor(hour_id)16 -2.168e+01 1.926e+00 -11.254 < 2e-16 ***
## monFeb:as.factor(hour_id)16 -2.167e+01 1.907e+00 -11.364 < 2e-16 ***
## monJan:as.factor(hour_id)16 -2.169e+01 1.910e+00 -11.352 < 2e-16 ***
## monJul:as.factor(hour_id)16 -2.064e+01 1.919e+00 -10.759 < 2e-16 ***
## monJun:as.factor(hour_id)16 -2.018e+01 1.903e+00 -10.603 < 2e-16 ***
## monMar:as.factor(hour_id)16 -2.124e+01 1.907e+00 -11.133 < 2e-16 ***
## monMay:as.factor(hour_id)16 -2.022e+01 1.901e+00 -10.641 < 2e-16 ***
## monNov:as.factor(hour_id)16 -2.189e+01 1.941e+00 -11.274 < 2e-16 ***
## monOct:as.factor(hour_id)16 -2.219e+01 1.951e+00 -11.373 < 2e-16 ***
## monSep:as.factor(hour_id)16 -2.201e+01 1.956e+00 -11.254 < 2e-16 ***
## monApr:as.factor(hour_id)17 -2.081e+01 1.904e+00 -10.931 < 2e-16 ***
## monAug:as.factor(hour_id)17 -2.168e+01 1.944e+00 -11.149 < 2e-16 ***
## monDec:as.factor(hour_id)17 -2.075e+01 1.913e+00 -10.847 < 2e-16 ***
## monFeb:as.factor(hour_id)17 -2.136e+01 1.901e+00 -11.235 < 2e-16 ***
## monJan:as.factor(hour_id)17 -2.076e+01 1.901e+00 -10.916 < 2e-16 ***
## monJul:as.factor(hour_id)17 -2.170e+01 1.918e+00 -11.315 < 2e-16 ***
## monJun:as.factor(hour_id)17 -2.073e+01 1.905e+00 -10.883 < 2e-16 ***
## monMar:as.factor(hour_id)17 -2.114e+01 1.907e+00 -11.086 < 2e-16 ***
## monMay:as.factor(hour_id)17 -2.071e+01 1.901e+00 -10.891 < 2e-16 ***
## monNov:as.factor(hour_id)17 -2.097e+01 1.931e+00 -10.858 < 2e-16 ***
## monOct:as.factor(hour_id)17 -2.144e+01 1.944e+00 -11.029 < 2e-16 ***
## monSep:as.factor(hour_id)17 -2.214e+01 1.948e+00 -11.364 < 2e-16 ***
## monApr:as.factor(hour_id)18 -2.124e+01 1.902e+00 -11.168 < 2e-16 ***
## monAug:as.factor(hour_id)18 -2.183e+01 1.934e+00 -11.284 < 2e-16 ***
## monDec:as.factor(hour_id)18 -2.072e+01 1.911e+00 -10.844 < 2e-16 ***
## monFeb:as.factor(hour_id)18 -2.028e+01 1.884e+00 -10.764 < 2e-16 ***
## monJan:as.factor(hour_id)18 -2.042e+01 1.892e+00 -10.793 < 2e-16 ***
## monJul:as.factor(hour_id)18 -2.124e+01 1.914e+00 -11.094 < 2e-16 ***
## monJun:as.factor(hour_id)18 -2.090e+01 1.906e+00 -10.970 < 2e-16 ***
## monMar:as.factor(hour_id)18 -2.032e+01 1.900e+00 -10.694 < 2e-16 ***
## monMay:as.factor(hour_id)18 -2.130e+01 1.902e+00 -11.203 < 2e-16 ***
## monNov:as.factor(hour_id)18 -2.077e+01 1.927e+00 -10.780 < 2e-16 ***
## monOct:as.factor(hour_id)18 -2.075e+01 1.931e+00 -10.747 < 2e-16 ***
## monSep:as.factor(hour_id)18 -2.121e+01 1.935e+00 -10.964 < 2e-16 ***
## monApr:as.factor(hour_id)19 -2.016e+01 1.897e+00 -10.623 < 2e-16 ***
## monAug:as.factor(hour_id)19 -2.052e+01 1.915e+00 -10.712 < 2e-16 ***
## monDec:as.factor(hour_id)19 -2.078e+01 1.914e+00 -10.856 < 2e-16 ***
## monFeb:as.factor(hour_id)19 -2.026e+01 1.882e+00 -10.766 < 2e-16 ***
## monJan:as.factor(hour_id)19 -2.050e+01 1.896e+00 -10.810 < 2e-16 ***
## monJul:as.factor(hour_id)19 -2.052e+01 1.906e+00 -10.766 < 2e-16 ***
## monJun:as.factor(hour_id)19 -2.020e+01 1.905e+00 -10.603 < 2e-16 ***
## monMar:as.factor(hour_id)19 -2.009e+01 1.892e+00 -10.618 < 2e-16 ***
## monMay:as.factor(hour_id)19 -2.012e+01 1.899e+00 -10.596 < 2e-16 ***
## monNov:as.factor(hour_id)19 -2.082e+01 1.929e+00 -10.792 < 2e-16 ***
## monOct:as.factor(hour_id)19 -2.082e+01 1.934e+00 -10.764 < 2e-16 ***
## monSep:as.factor(hour_id)19 -2.058e+01 1.918e+00 -10.733 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.355 on 10460 degrees of freedom
## (17 observations deleted due to missingness)
## Multiple R-squared: 0.9488, Adjusted R-squared: 0.9479
## F-statistic: 1094 on 177 and 10460 DF, p-value: < 2.2e-16
checkresiduals(tsr4)

##
## Breusch-Godfrey test for serial correlation of order up to 180
##
## data: Residuals
## LM test = 552.2, df = 180, p-value < 2.2e-16
prediction_data_tsr[, prediction_model4 := predict(tsr4,model_data_test)]
prediction_data_tsr$prediction_model4[prediction_data_tsr$prediction_model4 < 0] = 0
prediction_data_tsr[, residual_model4 := actual - prediction_model4]
ggplot(prediction_data_tsr, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model4, color = "model 4"))

sum_actual = sum(prediction_data_tsr$actual)
sum_error1 = sum(abs(prediction_data_tsr$residual_model1))
sum_error2 = sum(abs(prediction_data_tsr$residual_model2))
sum_error3 = sum(abs(prediction_data_tsr$residual_model3))
sum_error4 = sum(abs(prediction_data_tsr$residual_model4))
wmape_df <- data_frame(model = c("Model 1", "Model 2", "Model 3", "Model 4"),WMAPE = c(sum_error1/sum_actual, sum_error2/sum_actual, sum_error3/sum_actual, sum_error4/sum_actual), Model_Type = c("Time Series Regression","Time Series Regression","Time Series Regression","Time Series Regression"))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
wmape_df
kpss.test(model_data_tr$production, null = "Trend")
## Warning in kpss.test(model_data_tr$production, null = "Trend"): p-value smaller
## than printed p-value
##
## KPSS Test for Trend Stationarity
##
## data: model_data_tr$production
## KPSS Trend = 2.4359, Truncation lag parameter = 12, p-value = 0.01
ggplot(model_data_tr, aes(x = datetime, y = diff14)) + geom_line() + labs(title = "Diff14 Production Data", x = "Date Time", y = "Production Diff14")
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_line()`).

kpss.test(model_data_tr$diff14, null = "Trend")
## Warning in kpss.test(model_data_tr$diff14, null = "Trend"): p-value greater
## than printed p-value
##
## KPSS Test for Trend Stationarity
##
## data: model_data_tr$diff14
## KPSS Trend = 0.0022202, Truncation lag parameter = 12, p-value = 0.1
plot(acf(model_data_tr[complete.cases(model_data_tr)]$diff14), main = "ACF for Diff14 Production")


plot(pacf(model_data_tr[complete.cases(model_data_tr)]$diff14), main = "PACF for Diff14 Production")


arima_model <- auto.arima(production_available$diff14, seasonal = F, trace = T, stepwise = F, approximation = F)
##
## ARIMA(0,0,0) with zero mean : 56645.24
## ARIMA(0,0,0) with non-zero mean : 56647.24
## ARIMA(0,0,1) with zero mean : 51520.45
## ARIMA(0,0,1) with non-zero mean : 51522.45
## ARIMA(0,0,2) with zero mean : 49765.64
## ARIMA(0,0,2) with non-zero mean : 49767.64
## ARIMA(0,0,3) with zero mean : 49259.44
## ARIMA(0,0,3) with non-zero mean : 49261.44
## ARIMA(0,0,4) with zero mean : 48988.43
## ARIMA(0,0,4) with non-zero mean : 48990.43
## ARIMA(0,0,5) with zero mean : 48936.12
## ARIMA(0,0,5) with non-zero mean : 48938.13
## ARIMA(1,0,0) with zero mean : 48861.46
## ARIMA(1,0,0) with non-zero mean : 48863.47
## ARIMA(1,0,1) with zero mean : 48859.42
## ARIMA(1,0,1) with non-zero mean : 48861.42
## ARIMA(1,0,2) with zero mean : 48853.09
## ARIMA(1,0,2) with non-zero mean : 48855.09
## ARIMA(1,0,3) with zero mean : 48854.23
## ARIMA(1,0,3) with non-zero mean : 48856.24
## ARIMA(1,0,4) with zero mean : 48851.29
## ARIMA(1,0,4) with non-zero mean : 48853.29
## ARIMA(2,0,0) with zero mean : 48859.11
## ARIMA(2,0,0) with non-zero mean : 48861.11
## ARIMA(2,0,1) with zero mean : 48851.28
## ARIMA(2,0,1) with non-zero mean : 48853.28
## ARIMA(2,0,2) with zero mean : 48850.45
## ARIMA(2,0,2) with non-zero mean : 48852.43
## ARIMA(2,0,3) with zero mean : 48852.42
## ARIMA(2,0,3) with non-zero mean : 48854.38
## ARIMA(3,0,0) with zero mean : 48852.47
## ARIMA(3,0,0) with non-zero mean : 48854.47
## ARIMA(3,0,1) with zero mean : 48850.52
## ARIMA(3,0,1) with non-zero mean : 48852.46
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : Inf
## ARIMA(4,0,0) with zero mean : 48853.58
## ARIMA(4,0,0) with non-zero mean : 48855.58
## ARIMA(4,0,1) with zero mean : Inf
## ARIMA(4,0,1) with non-zero mean : Inf
## ARIMA(5,0,0) with zero mean : 48850.23
## ARIMA(5,0,0) with non-zero mean : 48852.23
##
##
##
## Best model: ARIMA(5,0,0) with zero mean
arima_model
## Series: production_available$diff14
## ARIMA(5,0,0) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5
## 0.674 0.0358 -0.0316 0.0226 -0.0209
## s.e. 0.009 0.0109 0.0109 0.0109 0.0090
##
## sigma^2 = 3.17: log likelihood = -24419.11
## AIC=48850.22 AICc=48850.23 BIC=48894.69
plot(acf(arima_model$residuals), main = "ACF for ARIMA(5,0,0) Residuals")


plot(pacf(arima_model$residuals), main = "PACF for ARIMA(5,0,0) Residuals")


checkresiduals(arima_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,0) with zero mean
## Q* = 13.651, df = 5, p-value = 0.01798
##
## Model df: 5. Total lags used: 10
fc_period = length(model_data_test$Lag14)
summary(arima_model)
## Series: production_available$diff14
## ARIMA(5,0,0) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5
## 0.674 0.0358 -0.0316 0.0226 -0.0209
## s.e. 0.009 0.0109 0.0109 0.0109 0.0090
##
## sigma^2 = 3.17: log likelihood = -24419.11
## AIC=48850.22 AICc=48850.23 BIC=48894.69
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.0002946464 1.780177 1.097426 NaN Inf 0.9225724 0.0001199595
forecasted_diff_arima <- forecast(arima_model,fc_period)
prediction_data_arima <- copy(model_data_test)
prediction_data_arima[, actual := production]
prediction_data_arima[, prediction_diff_model5 := forecasted_diff_arima$mean]
prediction_data_arima[, prediction_model5 := prediction_diff_model5 + Lag14]
prediction_data_arima[, residual_model5 := actual - prediction_model5]
ggplot(prediction_data_arima, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model5, color = "model 5"))

sum_actual = sum(prediction_data_arima$actual)
sum_error5 = sum(abs(prediction_data_arima$residual_model5))
wmape_df = rbind(wmape_df, c("Model 5", sum_error5/sum_actual, "ARIMA(5,0,0)"))
wmape_df
ts_data <- ts(model_data_tr$diff14, frequency = 14)
sarima_model <- auto.arima(ts_data, seasonal = T, trace = T, stepwise = F, approximation = F)
##
## ARIMA(0,0,0) with zero mean : 49037.71
## ARIMA(0,0,0) with non-zero mean : 49039.71
## ARIMA(0,0,0)(0,0,1)[14] with zero mean : 45355.3
## ARIMA(0,0,0)(0,0,1)[14] with non-zero mean : 45357.29
## ARIMA(0,0,0)(0,0,2)[14] with zero mean : Inf
## ARIMA(0,0,0)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,0)(1,0,0)[14] with zero mean : 47091.93
## ARIMA(0,0,0)(1,0,0)[14] with non-zero mean : 47093.91
## ARIMA(0,0,0)(1,0,1)[14] with zero mean : Inf
## ARIMA(0,0,0)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,0)(1,0,2)[14] with zero mean : Inf
## ARIMA(0,0,0)(1,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,0)(2,0,0)[14] with zero mean : 46505.86
## ARIMA(0,0,0)(2,0,0)[14] with non-zero mean : 46507.86
## ARIMA(0,0,0)(2,0,1)[14] with zero mean : Inf
## ARIMA(0,0,0)(2,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,0)(2,0,2)[14] with zero mean : Inf
## ARIMA(0,0,0)(2,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,1) with zero mean : 44809.25
## ARIMA(0,0,1) with non-zero mean : 44811.25
## ARIMA(0,0,1)(0,0,1)[14] with zero mean : 40354.14
## ARIMA(0,0,1)(0,0,1)[14] with non-zero mean : 40356.12
## ARIMA(0,0,1)(0,0,2)[14] with zero mean : Inf
## ARIMA(0,0,1)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,1)(1,0,0)[14] with zero mean : 42521.71
## ARIMA(0,0,1)(1,0,0)[14] with non-zero mean : 42523.7
## ARIMA(0,0,1)(1,0,1)[14] with zero mean : Inf
## ARIMA(0,0,1)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,1)(1,0,2)[14] with zero mean : Inf
## ARIMA(0,0,1)(1,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,1)(2,0,0)[14] with zero mean : 41827.75
## ARIMA(0,0,1)(2,0,0)[14] with non-zero mean : 41829.75
## ARIMA(0,0,1)(2,0,1)[14] with zero mean : Inf
## ARIMA(0,0,1)(2,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,1)(2,0,2)[14] with zero mean : Inf
## ARIMA(0,0,1)(2,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,2) with zero mean : 43343.38
## ARIMA(0,0,2) with non-zero mean : 43345.38
## ARIMA(0,0,2)(0,0,1)[14] with zero mean : Inf
## ARIMA(0,0,2)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,2)(0,0,2)[14] with zero mean : Inf
## ARIMA(0,0,2)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,2)(1,0,0)[14] with zero mean : 40865.9
## ARIMA(0,0,2)(1,0,0)[14] with non-zero mean : 40867.89
## ARIMA(0,0,2)(1,0,1)[14] with zero mean : Inf
## ARIMA(0,0,2)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,2)(1,0,2)[14] with zero mean : Inf
## ARIMA(0,0,2)(1,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,2)(2,0,0)[14] with zero mean : 40131.26
## ARIMA(0,0,2)(2,0,0)[14] with non-zero mean : 40133.26
## ARIMA(0,0,2)(2,0,1)[14] with zero mean : Inf
## ARIMA(0,0,2)(2,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,3) with zero mean : 42930.45
## ARIMA(0,0,3) with non-zero mean : 42932.45
## ARIMA(0,0,3)(0,0,1)[14] with zero mean : Inf
## ARIMA(0,0,3)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,3)(0,0,2)[14] with zero mean : Inf
## ARIMA(0,0,3)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(0,0,3)(1,0,0)[14] with zero mean : 40361.65
## ARIMA(0,0,3)(1,0,0)[14] with non-zero mean : 40363.64
## ARIMA(0,0,3)(1,0,1)[14] with zero mean : Inf
## ARIMA(0,0,3)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,3)(2,0,0)[14] with zero mean : 39577.93
## ARIMA(0,0,3)(2,0,0)[14] with non-zero mean : 39579.93
## ARIMA(0,0,4) with zero mean : 42707.16
## ARIMA(0,0,4) with non-zero mean : 42709.17
## ARIMA(0,0,4)(0,0,1)[14] with zero mean : Inf
## ARIMA(0,0,4)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(0,0,4)(1,0,0)[14] with zero mean : 40117.35
## ARIMA(0,0,4)(1,0,0)[14] with non-zero mean : 40119.35
## ARIMA(0,0,5) with zero mean : 42659.94
## ARIMA(0,0,5) with non-zero mean : 42661.94
## ARIMA(1,0,0) with zero mean : 42620
## ARIMA(1,0,0) with non-zero mean : 42622
## ARIMA(1,0,0)(0,0,1)[14] with zero mean : Inf
## ARIMA(1,0,0)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,0)(0,0,2)[14] with zero mean : Inf
## ARIMA(1,0,0)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(1,0,0)(1,0,0)[14] with zero mean : 39967.77
## ARIMA(1,0,0)(1,0,0)[14] with non-zero mean : 39969.77
## ARIMA(1,0,0)(1,0,1)[14] with zero mean : Inf
## ARIMA(1,0,0)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,0)(1,0,2)[14] with zero mean : Inf
## ARIMA(1,0,0)(1,0,2)[14] with non-zero mean : Inf
## ARIMA(1,0,0)(2,0,0)[14] with zero mean : 39139.21
## ARIMA(1,0,0)(2,0,0)[14] with non-zero mean : 39141.21
## ARIMA(1,0,0)(2,0,1)[14] with zero mean : Inf
## ARIMA(1,0,0)(2,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,0)(2,0,2)[14] with zero mean : Inf
## ARIMA(1,0,0)(2,0,2)[14] with non-zero mean : Inf
## ARIMA(1,0,1) with zero mean : 42612.56
## ARIMA(1,0,1) with non-zero mean : 42614.56
## ARIMA(1,0,1)(0,0,1)[14] with zero mean : Inf
## ARIMA(1,0,1)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,1)(0,0,2)[14] with zero mean : Inf
## ARIMA(1,0,1)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(1,0,1)(1,0,0)[14] with zero mean : 39962.12
## ARIMA(1,0,1)(1,0,0)[14] with non-zero mean : 39964.12
## ARIMA(1,0,1)(1,0,1)[14] with zero mean : Inf
## ARIMA(1,0,1)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,1)(1,0,2)[14] with zero mean : Inf
## ARIMA(1,0,1)(1,0,2)[14] with non-zero mean : Inf
## ARIMA(1,0,1)(2,0,0)[14] with zero mean : 39133.78
## ARIMA(1,0,1)(2,0,0)[14] with non-zero mean : 39135.78
## ARIMA(1,0,1)(2,0,1)[14] with zero mean : Inf
## ARIMA(1,0,1)(2,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,2) with zero mean : 42603.68
## ARIMA(1,0,2) with non-zero mean : 42605.68
## ARIMA(1,0,2)(0,0,1)[14] with zero mean : Inf
## ARIMA(1,0,2)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,2)(0,0,2)[14] with zero mean : Inf
## ARIMA(1,0,2)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(1,0,2)(1,0,0)[14] with zero mean : 39950.64
## ARIMA(1,0,2)(1,0,0)[14] with non-zero mean : 39952.64
## ARIMA(1,0,2)(1,0,1)[14] with zero mean : Inf
## ARIMA(1,0,2)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,2)(2,0,0)[14] with zero mean : 39123.58
## ARIMA(1,0,2)(2,0,0)[14] with non-zero mean : 39125.58
## ARIMA(1,0,3) with zero mean : 42605.35
## ARIMA(1,0,3) with non-zero mean : 42607.35
## ARIMA(1,0,3)(0,0,1)[14] with zero mean : Inf
## ARIMA(1,0,3)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(1,0,3)(1,0,0)[14] with zero mean : 39952.21
## ARIMA(1,0,3)(1,0,0)[14] with non-zero mean : 39954.21
## ARIMA(1,0,4) with zero mean : 42600.98
## ARIMA(1,0,4) with non-zero mean : 42602.98
## ARIMA(2,0,0) with zero mean : 42611.65
## ARIMA(2,0,0) with non-zero mean : 42613.65
## ARIMA(2,0,0)(0,0,1)[14] with zero mean : Inf
## ARIMA(2,0,0)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(2,0,0)(0,0,2)[14] with zero mean : Inf
## ARIMA(2,0,0)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(2,0,0)(1,0,0)[14] with zero mean : 39961.34
## ARIMA(2,0,0)(1,0,0)[14] with non-zero mean : 39963.34
## ARIMA(2,0,0)(1,0,1)[14] with zero mean : Inf
## ARIMA(2,0,0)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(2,0,0)(1,0,2)[14] with zero mean : Inf
## ARIMA(2,0,0)(1,0,2)[14] with non-zero mean : Inf
## ARIMA(2,0,0)(2,0,0)[14] with zero mean : Inf
## ARIMA(2,0,0)(2,0,0)[14] with non-zero mean : Inf
## ARIMA(2,0,0)(2,0,1)[14] with zero mean : Inf
## ARIMA(2,0,0)(2,0,1)[14] with non-zero mean : Inf
## ARIMA(2,0,1) with zero mean : 42601.93
## ARIMA(2,0,1) with non-zero mean : 42603.95
## ARIMA(2,0,1)(0,0,1)[14] with zero mean : Inf
## ARIMA(2,0,1)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(2,0,1)(0,0,2)[14] with zero mean : Inf
## ARIMA(2,0,1)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(2,0,1)(1,0,0)[14] with zero mean : 39951.86
## ARIMA(2,0,1)(1,0,0)[14] with non-zero mean : 39953.86
## ARIMA(2,0,1)(1,0,1)[14] with zero mean : Inf
## ARIMA(2,0,1)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(2,0,1)(2,0,0)[14] with zero mean : 39124.9
## ARIMA(2,0,1)(2,0,0)[14] with non-zero mean : 39126.91
## ARIMA(2,0,2) with zero mean : 42602.32
## ARIMA(2,0,2) with non-zero mean : 42604.32
## ARIMA(2,0,2)(0,0,1)[14] with zero mean : Inf
## ARIMA(2,0,2)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(2,0,2)(1,0,0)[14] with zero mean : 39950.9
## ARIMA(2,0,2)(1,0,0)[14] with non-zero mean : 39952.9
## ARIMA(2,0,3) with zero mean : 42604.28
## ARIMA(2,0,3) with non-zero mean : 42606.28
## ARIMA(3,0,0) with zero mean : 42602.74
## ARIMA(3,0,0) with non-zero mean : 42604.75
## ARIMA(3,0,0)(0,0,1)[14] with zero mean : Inf
## ARIMA(3,0,0)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(3,0,0)(0,0,2)[14] with zero mean : Inf
## ARIMA(3,0,0)(0,0,2)[14] with non-zero mean : Inf
## ARIMA(3,0,0)(1,0,0)[14] with zero mean : 39949.77
## ARIMA(3,0,0)(1,0,0)[14] with non-zero mean : 39951.77
## ARIMA(3,0,0)(1,0,1)[14] with zero mean : Inf
## ARIMA(3,0,0)(1,0,1)[14] with non-zero mean : Inf
## ARIMA(3,0,0)(2,0,0)[14] with zero mean : 39122.98
## ARIMA(3,0,0)(2,0,0)[14] with non-zero mean : 39124.98
## ARIMA(3,0,1) with zero mean : 42602.3
## ARIMA(3,0,1) with non-zero mean : 42604.3
## ARIMA(3,0,1)(0,0,1)[14] with zero mean : Inf
## ARIMA(3,0,1)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(3,0,1)(1,0,0)[14] with zero mean : 39950.86
## ARIMA(3,0,1)(1,0,0)[14] with non-zero mean : 39952.91
## ARIMA(3,0,2) with zero mean : Inf
## ARIMA(3,0,2) with non-zero mean : Inf
## ARIMA(4,0,0) with zero mean : 42604.49
## ARIMA(4,0,0) with non-zero mean : 42606.49
## ARIMA(4,0,0)(0,0,1)[14] with zero mean : Inf
## ARIMA(4,0,0)(0,0,1)[14] with non-zero mean : Inf
## ARIMA(4,0,0)(1,0,0)[14] with zero mean : 39951.51
## ARIMA(4,0,0)(1,0,0)[14] with non-zero mean : 39953.51
## ARIMA(4,0,1) with zero mean : Inf
## ARIMA(4,0,1) with non-zero mean : Inf
## ARIMA(5,0,0) with zero mean : 42600.07
## ARIMA(5,0,0) with non-zero mean : 42602.07
##
##
##
## Best model: ARIMA(3,0,0)(2,0,0)[14] with zero mean
sarima_model
## Series: ts_data
## ARIMA(3,0,0)(2,0,0)[14] with zero mean
##
## Coefficients:
## ar1 ar2 ar3 sar1 sar2
## 0.6916 0.0509 -0.0337 -0.6013 -0.2745
## s.e. 0.0097 0.0118 0.0097 0.0094 0.0093
##
## sigma^2 = 2.311: log likelihood = -19555.48
## AIC=39122.97 AICc=39122.98 BIC=39166.6
plot(acf(sarima_model$residuals), main = "ACF for ARIMA(3,0,0)(2,0,0)[14] Residuals")


plot(pacf(sarima_model$residuals), main = "PACF for ARIMA(3,0,0)(2,0,0)[14] Residuals")


checkresiduals(sarima_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,0)(2,0,0)[14] with zero mean
## Q* = 321.93, df = 23, p-value < 2.2e-16
##
## Model df: 5. Total lags used: 28
summary(sarima_model)
## Series: ts_data
## ARIMA(3,0,0)(2,0,0)[14] with zero mean
##
## Coefficients:
## ar1 ar2 ar3 sar1 sar2
## 0.6916 0.0509 -0.0337 -0.6013 -0.2745
## s.e. 0.0097 0.0118 0.0097 0.0094 0.0093
##
## sigma^2 = 2.311: log likelihood = -19555.48
## AIC=39122.97 AICc=39122.98 BIC=39166.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.000550437 1.519956 0.9709748 NaN Inf 0.4051865 9.730008e-05
forecasted_diff_sarima <- forecast(sarima_model,fc_period)
prediction_data_arima[, prediction_diff_model6 := forecasted_diff_sarima$mean]
prediction_data_arima[, prediction_model6 := prediction_diff_model6 + Lag14]
prediction_data_arima[, residual_model6 := actual - prediction_model6]
ggplot(prediction_data_arima, aes(x = datetime)) + geom_line(aes(y = actual, color = "actual")) + geom_line(aes(y = prediction_model6, color = "model 6"))

sum_error6 = sum(abs(prediction_data_arima$residual_model6))
wmape_df = rbind(wmape_df, c("Model 6", sum_error6/sum_actual, "ARIMA(3,0,0)(2,0,0)[14]"))
wmape_df
Residuals <- data_frame(model_1 = prediction_data_tsr$residual_model1,model_2 = prediction_data_tsr$residual_model2, model_3 = prediction_data_tsr$residual_model3, model_4 = prediction_data_tsr$residual_model4, model_5 = prediction_data_arima$residual_model5, model_6 = prediction_data_arima$residual_model6)
library(tidyr)
Residual_model <- Residuals %>%
pivot_longer(cols = starts_with("model"), names_to = "model", values_to = "residual")
ggplot(Residual_model, aes(x = residual, y = model, fill = model)) +geom_density_ridges() + labs(title = "Residual Distributions of Each Model")
## Picking joint bandwidth of 0.214
